********************************
* This file creates Table A1
* Note: Unlike all other fiiles,
* This file does not use build.dta
* The reason is that build.dta drops 
* counties that border TVA
* Here, we need counties that border TVA
********************************
clear all
set mem 4000m
set matsize 2000
set maxvar 8000


**************************
* TVA DUMMIES
**************************
infile fips using ~/tva/data/tva_county_dummy/tvacounties.txt
summ
g tva = 1
sort fips
save tmp, replace

***************************
* READ STATE-LEVEL DATA
* AND TAKE 1930 INCOME VARIABLE
***************************
u ~/tva/data/valentina/state_variables/state_level_data
keep state pcp_income_30
sort state
save tmp7, replace


***************************
* READ WAGE DATA
* THESE DATA ARE THE
* CORRECT MANUF AND TRADE WAGES.
* IN TVA, WAGE DATA HAVE PROBLEMS
* VALENTINA
* FIXED THEM AND PUT IT IN TVA1
******************************
u ~/tva/data/valentina/county_variables/new/tva1
keep fips mwage* pcmwage* pctwage* var88_county72 var89_county72
duplicates report fips
duplicates drop fips, force
sort fips
save tmp76, replace


***************************
* READ COUNTY-LEVEL DATA
***************************
u ~/tva/data/valentina/county_variables/tva_update //this file is built by tva_update.do (certified by Valentina 7/14/2011)
duplicates report fips
drop mwage* twage*
drop _merge
sort fips
merge 1:1 fips using tmp76
tab _merge
keep if _merge ==3
drop _merge

merge 1:1 fips using tmp
tab _merge
drop if _merge ==2
replace tva = 0 if tva ==.

****************************
*  Merge on Fishback Data  *
****************************

drop _merge
drop if county==.|state==.

merge 1:1 county state using /accounts/projects/tva/tva/data/topography/fishback
tab _merge
drop if _merge==2
drop N10

****************************
*  Merge on Ag Land Values *
****************************
drop _merge

merge 1:1 fips using /accounts/projects/tva/tva/data/agricultural_land/data

tab _merge
drop if _merge==2
duplicates report fips

****************************
*  Merge on TVA 'Donut'    *
****************************
drop _merge
joinby fips using /accounts/projects/tva/tva/data/border_counties/donut, unmatched(both)
tab _merge
drop if _merge==2

****************************
*  Merge on fata with neighbors of neighbors
****************************
drop _merge
joinby fips using /accounts/projects/tva/tva/data/border_counties/neighbors_of_neighbors/donut2, unmatched(both)
tab _merge
drop if _merge==2



****************************
*  Merge on Employment     *
****************************
drop _merge 
merge 1:1 fips using ~/tva/data/valentina/county_variables/enrico/enrico_jobs

tab _merge
drop if _merge==2
duplicates report fips


*******************************
*  Merge on Housing Val/Rents *
*******************************

drop _merge 
merge 1:1 fips using ~/tva/data/valentina/county_variables/RawData/housingvals

tab _merge
drop if _merge==2
duplicates report fips


drop _merge 
merge 1:1 fips using ~/tva/data/valentina/county_variables/RawData/county62_1, keepusing(var61_county62 var63_county62)

ren var61_county62 medhsval60
ren var63_county62 medrnt60
tab _merge
drop if _merge==2
duplicates report fips



*******************************
*  Merge on 1890 Vars 	      *
*******************************

drop _merge 
merge 1:1 fips using ~/tva/data/valentina/county_variables/1890/data.dta

tab _merge
drop if _merge==2
duplicates report fips


*******************************
* Merge on weather variables  *
*******************************
drop _merge

merge 1:1 fips using /accounts/projects/tva/tva/data/weather/JUL_MEAN_IDW200_365_1968_2002.dta
foreach var of varlist tmin tmean tmax{
	ren `var' `var'_jul
}
drop if _merge==2
drop _merge

merge 1:1 fips using /accounts/projects/tva/tva/data/weather/JAN_MEAN_IDW200_365_1968_2002.dta
foreach var of varlist tmin tmean tmax{
	ren `var' `var'_jan
}
drop if _merge==2




****************************
*  Cleanup and Definition  *
****************************

* State and region
drop state
g state = int(fips/1000)
drop if state ==51 | state ==52 | state ==2 | state ==3 | state ==15
g northeast =0
g midwest=0
g south=0
g west=0
replace northeast =1 if state == 9 | state == 23 | state == 25 | state == 33 | state == 44 | state  == 50 | state == 34 | state == 36 | state == 42
replace midwest=1    if state == 17 | state == 18 | state == 26 | state == 39 | state == 55 | state ==19 | state == 20 | state == 27 | state == 29 | state == 31 | state == 38 | state == 46
replace south=1      if state ==10 | state == 11 | state == 12 | state == 13 | state == 24 | state  == 37 | state == 45 | state == 51   | state == 54 | state == 1 | state == 21 | state == 28  | state == 47 | state == 5 | state == 22 | state == 40 | state == 48
replace west=1       if state == 4 | state == 8 | state == 16 | state == 30 | state == 32 | state == 35 | state == 49 | state == 56 | state == 2 | state == 6 | state == 15 | state == 41 | state ==53
g       region =1 if northeast ==1
replace region =2 if midwest ==1
replace region =3 if south ==1
replace region =4 if west ==1

* CPI
g cpi890= 5 //need to get an official number here
g cpi0  = 7 
g cpi10 = 9 
g cpi20 = 20
g cpi30 = 16.7
g cpi40 = 14
g cpi50 = 24.1
g cpi60 = 29.6
g cpi70 = 38.8
g cpi80 = 82.4
g cpi90 = 130.7
g cpi2000 = 172.2


* Merge the state-level variable
drop _merge
sort state
merge state using tmp7
tab _merge
drop if tva ==.
rm tmp.dta
rm tmp7.dta
rm tmp76.dta

***********************************************
*Drop Donut and counties with missing lat/long*
***********************************************
* XXXXXX
** drop if border_county==1|latitude==.|longitud==. 
tab border_county
drop if latitude==.|longitud==.


*******************************************************
* Drop counties with very low populations in any year *
*******************************************************

drop if pop0<1000|pop10<1000|pop20<1000|pop30<1000|pop40<1000|pop50<1000|pop60<1000|pop70<1000|pop80<1000|pop90<1000|pop2000<1000



********************************
* NEW VARIABLES
********************************

* Per capita wage in manufacturing, in real dollars
* Manufacturing payroll of production workers over average number of production workers 
g wage890    = (mwage890)/(cpi890/100)
g wage0    = (pcmwage00)/(cpi0/100)
g wage20   = (pcmwage20)/(cpi20/100)
g wage30   = (pcmwage30)/(cpi30/100)
g wage40   = (pcmwage39)/(cpi40/100)
g wage50   = (pcmwage47)/(cpi50/100)
g wage60   = (pcmwage58)/(cpi60/100)
g wage70   = (pcmwage67)/(cpi70/100)
g wage80   = (pcmwage82)/(cpi80/100)
g wage90   = (pcmwage87)/(cpi90/100)
g wage2000 = (pcmwage97)/(cpi2000/100)


* Average manufacturing wages of production workers
g prodwage40=manuf_prod_wages39/manuf_prod39
g prodwage50=manuf_prod_wages47/manuf_prod47
g prodwage60=manuf_prod_wages58/manuf_prod58
g prodwage70=manuf_prod_wages67/manuf_prod67
g prodwage90=manuf_prod_wages87/manuf_prod87
g prodwage2000=manuf_prod_wages97/manuf_prod97

summ wage*, detail
summ prodwage*, detail
correlate wage40 prodwage40
correlate wage50 prodwage50
correlate wage60 prodwage60
correlate wage70 prodwage70


g Dwage30 = log(wage30)- log(wage20)
g Dwage40 = log(wage40)-log(wage30)
g Dwage50 = log(wage50)-log(wage40)
g Dwage60 = log(wage60)-log(wage50)
g Dwage70 = log(wage70)-log(wage60)

g Dprod60 = log(prodwage60) - log(prodwage50)
g Dprod70 = log(prodwage70) - log(prodwage60)
g Dprod90 = log(prodwage90) - log(prodwage2000)

summ Dwage*, detail
summ Dprod*, detail

correlate Dwage60 Dprod60
correlate Dwage70 Dprod70




* Per capita wage in trade, in real dollars
* total payroll in wholesale establishments + retail establishments / (total workers in retail+ in wholsale)
g twage30  = pctwage30/(cpi30/100) 
g twage40  = pctwage40/(cpi40/100)
g twage50  = pctwage54/(cpi50/100)
g twage60  = pctwage63/(cpi60/100)
g twage70  = pctwage72/(cpi70/100)
g twage80  = pctwage82/(cpi80/100)
g twage90  = pctwage87/(cpi90/100)
g twage2000  = pctwage97/(cpi2000/100)

* Agricultural values

g lnfaval890 = ln(faval890/(cpi890/100))
g lnfaval0 = ln(faval900/(cpi0/100))
g lnfaval10 = ln(faval910/(cpi10/100))
g lnfaval20 = ln(faval920/(cpi20/100))
g lnfaval30 = ln(faval930/(cpi30/100))
g lnfaval40 = ln(faval940/(cpi40/100))
g lnfaval50 = ln(faval950/(cpi50/100))
g lnfaval60 = ln(faval959/(cpi60/100))
g lnfaval70 = ln(faval1969/(cpi70/100))
g lnfaval80 = ln(faval1982/(cpi80/100))
g lnfaval90 = ln(faval1992/(cpi90/100))
g lnfaval2000=ln(faval2002/(cpi2000/100))


* Median family income
gen lnmedfaminc50  = ln(medfaminc50/(cpi50/100)) 
gen lnmedfaminc60  = ln(medfaminc60/(cpi60/100)) 
gen lnmedfaminc70  = ln(medfaminc70/(cpi70/100)) 
gen lnmedfaminc80  = ln(medfaminc80/(cpi80/100)) 
gen lnmedfaminc90  = ln(medfaminc80/(cpi90/100)) 
gen lnmedfaminc2000= ln(medfaminc2000/(cpi2000/100))

* Farm production
gen lnvfprod30   = log(vfprod30/(cpi30/100))
gen lnvfprod40   = log(vfprod40/(cpi40/100))
gen lnvfprod50   = log(vfprod50/(cpi50/100))
gen lnvfprod60   = log(vfprod60/(cpi60/100))
gen lnvfprod70   = log(vfprod70/(cpi70/100))
gen lnvfprod80   = log(vfprod80/(cpi80/100))
gen lnvfprod90   = log(vfprod90/(cpi90/100))
gen lnvfprod2000 = log(vfprod2000/(cpi2000/100))

*Foreign born
gen fb0=fbwmtot00 + fbwftot00 
gen fb10=fbwtot10
gen fb20=fbwmtot20 + fbwftot20 
gen fb30=fbwmtot + fbwftot

gen fbshr20=fb20/(wmtot20 + wftot20)
gen fbshr30=fb30/(wmtot + wftot)

*Housing Values/Rents
*ren medval medhsval30
*ren medrent medrnt30 //different definition of rent

ren medrnt30_NHGIS medrnt30
ren medhsval30_NHGIS medhsval30
ren var88_county72 medhsval70
ren var89_county72 medrnt70

foreach var in medhsval medrnt{
	foreach yr in 30 40 50 60 70 80 90 2000{
		cap replace `var'`yr'=`var'`yr'/(cpi`yr'/100)
	}
}

*various
drop other60

*drop counties experiencing big changes in area
gen d=(b1_lnd01_county00 - area)/(b1_lnd01_county00 + area)/2
drop if abs(d)>.03
replace area=(b1_lnd01_county00 + area)/2



******************************
* standardize variable names *
******************************


ren emp00 emp0
ren manuf_jobs_00 manuf_jobs_0

foreach yr in 0 10 20 30 40 50 60 70 80 90 2000{
	cap drop manuf`yr'
	cap drop agr`yr'
	ren manuf_jobs_`yr' manuf`yr'
	cap ren ag_jobs`yr' agr`yr'
}
ren manuf_jobs890 manuf890



foreach yr in 0 10 20 30 60 80 90 2000{
	gen other`yr'=emp`yr'-agr`yr'-manuf`yr'
}


*********************
*    Make Share     *
*********************

foreach var in manuf agr{
	foreach yr in 0 10 20 30 40 50 60 70 80 90 2000{
		cap gen `var'shr`yr'=`var'`yr'/emp`yr'
	}
}

********************
* prepare outcomes *
********************

foreach var in pop emp house wage twage agr manuf other medhsval medrnt fb prodwage{
	cap gen ln`var'890=ln(`var'890)
	cap gen ln`var'0=ln(`var'0)
	cap gen ln`var'10=ln(`var'10)
	cap gen ln`var'20=ln(`var'20)
	cap gen ln`var'30=ln(`var'30)
	cap gen ln`var'40=ln(`var'40)
	cap gen ln`var'50=ln(`var'50)
	cap gen ln`var'60=ln(`var'60)
	cap gen ln`var'70=ln(`var'70)
	cap gen ln`var'80=ln(`var'80)
	cap gen ln`var'90=ln(`var'90)
	cap gen ln`var'2000=ln(`var'2000)
}

drop lntwage20  lnmedhsval20 lnmedrnt20 //stata confuses 20 and 2000


*******************
*Generate Controls*
*******************
g urbshare0=popurb0/pop0
g urbshare10=popurb10/pop10
g urbshare20=popurb20/pop20
g urbshare30=popurb30/pop20
gen popdens0=pop0/b1_lnd01_county00


*fix zeros in covariate quantities

foreach var in agr manuf other{
	foreach yr in 10 20 30{
		cap replace ln`var'`yr'=0 if `var'`yr'<=0
		gen no`var'`yr'dum=`var'`yr'<=0
	}
}

ren mfgcap00 mfgcap0
gen lnmfgcap0=ln(mfgcap0)
replace lnmfgcap0=0 if mfgcap0==0



*fix wages

foreach var in wage twage{
	foreach yr in 20 30{
		cap replace ln`var'`yr'=-1 if `var'`yr'==.
		cap gen no`var'`yr'dum=`var'`yr'==.
	}
}



*transformations
foreach var of varlist elevmax elevrang popdens0 tillit10 tillit1020 tillit1010 retsales radiorep totunemp area{
	gen ln`var'=ln(`var')
}


center tmean* lnelevmax

foreach var of varlist c_lnelevmax white0 white20 white30 c_tmean* lnmanuf0 lnmanuf10 lnmanuf20 lnradiorep lnemp20 lnemp30 lnwage0 lnwage20 lnwage30 lntwage30 lnpop0 lnpop20 lnpop30 lnfaval0 lnfaval20 lnfaval30 tmean*{
	gen `var'sq=`var'^2
	gen `var'cub=`var'^3
}

gen popdifsq=(lnpop30-lnpop20)^2
gen empdifsq=(lnemp30-lnemp20)^2
gen manufdifsq=(lnmanuf20-lnmanuf0)^2
gen urbsharedifsq=(urbshare20-urbshare0)^2
gen whitedifsq=(white20-white0)^2
gen agrdifsq=(lnagr20-lnagr0)^2
gen wagedifsq=(lnwage20-lnwage0)^2
gen favaldifsq=(lnfaval20-lnfaval0)^2

gen agrshr30sq=agrshr30^2
gen agrshr20sq=agrshr20^2

gen lnagr20sq=lnagr20^2
gen lnagr0sq=lnagr0^2
gen fbdifsq=(lnfb20-lnfb0)^2

gen wagedif2sq=(lnwage30-lnwage20)^2
gen tmean_jan_jul=tmean_jan*tmean_jul

gen pctil20=tillit1020/t10tot20
gen pctil30=tillit10/t10tot
replace PRADIO=PRADIO/100
gen urate30=totunemp/(totunemp+emp30)


global X "lnelevmax lnelevrang lnarea lnpop20 lnpop20sq lnpop30 lnpop30sq popdifsq agrshr20 agrshr20sq agrshr30 agrshr30sq manufshr20 manufshr30 nowage20 nowage30 lnwage20 lnwage30 notwage30 lntwage30 lnemp20 lnemp30 urbshare20 urbshare30 lnfaval20 lnfaval30 lnmedhsval30 lnmedrnt30 white20 white20sq white30 white30sq pctil20 pctil30 PRADIO urate30 fbshr20 fbshr30"


run "~/geo/x_ols_JP_v11.ado" //use Juan Pablo's version of Conley (1999)
global tables="tables"


*******************************************
*******************************************
* Propensity score 
*******************************************
*******************************************

sum $X

logit tva $X, cluster(state)
predict phat
replace phat = . if border_coun ==1 

keep if e(sample)==1

sum phat if tva==1, det
sum phat if tva==0, det
local cut=r(p25)

tab tva

preserve
drop if phat<`cut'&tva==0
**Construct O-B weights**

mata: D=st_data(.,"tva")
mata: one=J(rows(D),1,1)
mata: nD=one-D
order $X
local K:word count $X
mata: X=st_data(.,1..`K')
mata: X=(one, X)
mata: w=D'*X*invsym(quadcross(X,nD,X))*X'/sum(D)
mata: w=w':*nD
gen w=.
mata: st_store(.,"w",w)

replace w=1 if tva==1
sum w if tva==0
sum w if tva==1


restore
gen w=1 //weights





drop if phat<`cut'



local i=1
foreach var in lnpop lnemp lnhouse lnwage manufshr agrshr lnfaval{	
	g  Dy = (`var'40-`var'0)/4

	qui centile Dy, c(1 99)
	qui replace Dy=r(c_1) if Dy<r(c_1)
	qui replace Dy=r(c_2) if Dy>r(c_2)&Dy!=.

	qui reg Dy tva, cluster(state) 
	eststo base`i'
	qui x_ols Dy $X, cluster(state) bo(tva)
	eststo eval`i'
	qui x_ols Dy $X, lat(latitude) long(longitud) cut1(200) cut2(200) bo(tva)
	eststo cor_eval`i'
	drop Dy 
	local ++i
}






local i=1
foreach var in lnpop lnemp lnhouse lnwage manufshr agrshr lnfaval{	
	g  Dy = (`var'40-`var'0)/4

	qui centile Dy, c(1 99)
	qui replace Dy=r(c_1) if Dy<r(c_1)
	qui replace Dy=r(c_2) if Dy>r(c_2)&Dy!=.

	qui x_ols Dy tva if south==1, lat(latitude) long(longitud) cut1(200) cut2(200)
	eststo base`i', title(`var')
	qui x_ols Dy $X if south==1, lat(latitude) long(longitud) cut1(200) cut2(200) bo(tva)
	eststo cor_eval`i', title(`var')
	drop Dy 
	local ++i
}




******************************
*****************************
*****************************
******************************


replace border1 = 0 if border1 ==.
replace border2 = 0 if border2 ==.
summ fips border* tva
tab border_county border1
tab tva border1
tab tva border2
* some of the border2 counties are within tva
replace border2 = 0 if tva ==1
tab border1
tab border2
drop if tva ==1
drop tva
rename border2 tva

local i=1
foreach var in lnpop lnemp lnwage lnprodwage lntwage lnagr lnmanuf lnother lnvfprod lnmedfaminc lnfaval lnmedhsval lnmedrnt manufshr agrshr{
	
	if "`var'"=="lnmedfaminc"{
		g Dy=(`var'2000-`var'50)/5
	}
	else{
		g  Dy = (`var'2000-`var'40)/6
	}

	qui centile Dy, c(1 99)
	qui replace Dy=r(c_1) if Dy<r(c_1)
	qui replace Dy=r(c_2) if Dy>r(c_2)&Dy!=.

	qui reg Dy tva, cluster(state) 
	eststo base`i', title(`var')
	qui x_ols Dy $X, cluster(state) bo(tva)
	eststo eval`i', title(`var')
	qui x_ols Dy $X, lat(latitude) long(longitud) cut1(200) cut2(200) bo(tva)
	eststo cor_eval`i', title(`var')
	drop Dy 
	local ++i
}


esttab base* using base2.csv, replace keep(tva) mlabels(lnpop lnemp lnwage lnprodwage lntwage lnagr lnmanuf lnother  lnvfprod lnmedfaminc lnfaval lnmedhsval lnmedrnt manufshr agrshr) b(3) se(3) star(* 0.1 ** 0.05 *** 0.01)
esttab eval* using eval2.csv, replace keep(tva) mlabels(lnpop lnemp lnwage lnprodwage lntwage lnagr lnmanuf lnother  lnvfprod lnmedfaminc lnfaval lnmedhsval lnmedrnt manufshr agrshr) b(3) se(3) star(* 0.1 ** 0.05 *** 0.01)
esttab cor_* using cor2.csv, replace keep(tva) mlabels(lnpop lnemp lnwage lnprodwage lntwage lnagr lnmanuf lnother  lnvfprod lnmedfaminc lnfaval lnmedhsval lnmedrnt manufshr agrshr) b(3) se(3) star(* 0.1 ** 0.05 *** 0.01)





